Import Packages

# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")

# BiocManager::install("oligo")
#package for reading CEL files, will add more packages for further steps

Import Raw Data

csv_file <- read.table(file = r"(C:\Users\linde\rmarkdown\affy_processing\data_runsheets\GLDS_6_runsheet.csv)", 
                       sep=",",
                       header = TRUE,
                       check.names = FALSE
                       )
array_data_files <- csv_file["Path to Raw Data File"] #column name of paths

raw_data <- oligo::read.celfiles(array_data_files) #ExpressionFeatureSet Class
## Platform design info loaded.
## Reading in : C:\Users\linde\rmarkdown\input_data\GLDS6\GLDS-6_array_Rnor_SDR_keratinocytes_CTRL_Rep1_GSM144733_raw1.CEL.gz
## Reading in : C:\Users\linde\rmarkdown\input_data\GLDS6\GLDS-6_array_Rnor_SDR_keratinocytes_CTRL_Rep2_GSM144734_raw.CEL.gz
## Reading in : C:\Users\linde\rmarkdown\input_data\GLDS6\GLDS-6_array_Rnor_SDR_keratinocytes_CTRL_Rep3_GSM144735_raw.CEL.gz
## Reading in : C:\Users\linde\rmarkdown\input_data\GLDS6\GLDS-6_array_Rnor_SDR_keratinocytes_IR_56Fe_Rep1_GSM144736_raw.CEL.gz
## Reading in : C:\Users\linde\rmarkdown\input_data\GLDS6\GLDS-6_array_Rnor_SDR_keratinocytes_IR_56Fe_Rep2_GSM144737_raw.CEL.gz
## Reading in : C:\Users\linde\rmarkdown\input_data\GLDS6\GLDS-6_array_Rnor_SDR_keratinocytes_IR_56Fe_Rep3_GSM144738_raw.CEL.gz

Verify Data

print("Verify Data step")
## [1] "Verify Data step"

Quality Analysis

QA: Visualization

density_plot <- hist(raw_data, transfo=log2, which=c("pm", "mm", "bg", "both", "all"),
                    nsample=10000, target = "core", main = "Density Plot")

## TODO: Add sample legend   
#legend(15, 0.35, legend=c(raw_data[rownames]), lty=1:2, cex = 1)

transfo : used to scale the data

which : defines specific probe types

nsample : sample size used to produce plot

target : specifies group of meta-probeset

main : main title

pseudoimage <- image(raw_data, transfo=log2)

MA_plot <- MAplot(raw_data)
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, :
## Binning grid too coarse for current (small) bandwidth: consider increasing
## 'gridsize'

## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, :
## Binning grid too coarse for current (small) bandwidth: consider increasing
## 'gridsize'

### QA: Statistics

# BiocManager::install("AnnotationDbi")
# BiocManager::install("Biobase")

IQRray

# The IQRray statistic is obtained by ranking all the probe intensities from
# a given array and by computing the average rank for each probe set. The
# interquartile range (IQR) of the probe sets average ranks serves then as
# quality score. 
#Example of a low score: 122783.6
#   high score: 226124.6
library(methods)
library(AnnotationDbi)
library(Biobase)

    #data - Affybatch object obtained after reading in celfiles into R with function ReadAffy from package affy
    
    #obtaining intensity values for perfect match (pm) probes
pm_data<-pm(raw_data)
    
    #ranking probe intensities for every array 
rank_data<-apply(pm_data,2,rank)
    
    #obtaining names of probeset for every probe
probeNames<-probeNames(raw_data)
    
    #function computing IQR of mean probe ranks in probesets 
get_IQR<-function(rank_data,probeNames){round(IQR(sapply(split(rank_data,probeNames),mean)),digits=2)}
    
    #computing arIQR score
IQRray_score<-apply(rank_data,2,get_IQR,probeNames=probeNames)  

Normalize Data

#rma (Robust Multiarray Averaging)
#plm (Probe Level Models)
#normalized_data <- rma(raw_data, background=TRUE, normalize=TRUE, subset=NULL)
norm_probe <- normalize(raw_data)
## Normalizing... OK
norm_probe_table <- exprs(norm_probe)
#row names : genes
#column names : samples
norm_gene <- rma(raw_data)
## Background correcting
## Normalizing
## Calculating Expression
norm_gene_table <- exprs(norm_gene)
#probe_level <- fitProbeLevelModel(raw_data, background=TRUE, normalize=TRUE, target="core", method="plm", verbose=TRUE, S4 = FALSE)
raw_table <- exprs(raw_data)

Verify Normalization

print("Verify Normalization step")
## [1] "Verify Normalization step"

DGE Analysis

print("DGE Analysis step")
## [1] "DGE Analysis step"

Gene Annotations

print("Gene Annotations step")
## [1] "Gene Annotations step"